# -*- coding: utf-8 -*-
import datetime
from pymongo import MongoClient
from pymongo.errors import DuplicateKeyError
from traceback import format_exc
from zc_core.util.batch_gen import batch_to_year
from zc_core.util.done_filter import DoneFilter
from zc_core.model.items import *

from plap.items import SpuPageLog, ItemDataLog


class SpuPageLogPipeline(object):

    def __init__(self, mongo_uri, bot_name):
        self.mongo_uri = mongo_uri
        self.bot_name = bot_name
        self.client = None
        self.db_map = dict()

    @classmethod
    def from_crawler(cls, crawler):
        settings = crawler.settings
        return cls(
            mongo_uri=settings.get('MONGODB_URI'),
            bot_name=settings.get('BOT_NAME')
        )

    def open_spider(self, spider):
        _ = spider
        self.client = MongoClient(self.mongo_uri)
        # 默认初始化当前年的库
        year = str(datetime.datetime.now().year)
        self.db_map[year] = self.client['{}_{}'.format(self.bot_name, year)]
        self.spu_done_filter = DoneFilter(coll_name='spu_page_log_{}'.format(spider.batch_no))
        self.item_done_filter = DoneFilter(coll_name='item_log_{}'.format(spider.batch_no))

    def get_db(self, batch_no):
        year = batch_to_year(batch_no)
        db = self.db_map.get(year)
        if not db:
            db = self.client['{}_{}'.format(self.bot_name, year)]
            self.db_map[year] = db

        return db

    def close_spider(self, spider):
        _ = spider
        self.client.close()

    def process_item(self, item, spider):
        try:
            if not item:
                raise DropItem("drop empty item:  [{}]".format(self))

            if isinstance(item, SpuPageLog):
                batch_no = spider.batch_no
                if item.validate() and batch_no:
                    cat_id = item.get('catId', '')
                    page = item.get('page', '')
                    _id = item.get('_id', None)
                    if not _id and page and cat_id:
                        _id = 'c{}_p{}'.format(cat_id, page)
                        item['_id'] = _id
                    if _id and not self.spu_done_filter.contains(_id):
                        self.get_db(batch_no)['spu_page_log_{}'.format(batch_no)].update({'_id': _id}, {'$set': item}, upsert=True)
                        self.spu_done_filter.put(_id)
                    return item
            if isinstance(item, ItemDataLog):
                batch_no = spider.batch_no
                if item.validate() and batch_no:
                    spu_id = item.pop('spuId', '')
                    if spu_id and not self.item_done_filter.contains(spu_id):
                        self.get_db(batch_no)['item_log_{}'.format(batch_no)].update({'_id': spu_id}, {'$set': item}, upsert=True)
                        self.item_done_filter.put(spu_id)
                    return item
        except DuplicateKeyError:
            spider.logger.debug('duplicate key error collection')
        except Exception as e:
            _ = e
            spider.logger.error(format_exc())
        return item
